Retrieve

The fMRI time series are taken from https://paris-saclay-cds.github.io/autism_challenge/. You have to follow their instructions on how to retrieve the relavant fMRI data for the MSDL atlas.

Check out their GitHub repository at: https://github.com/ramp-kits/autism/.

Parse

The following code is heavily based on the code provided by the competition's organizers.


In [1]:
import numpy as np
import pandas as pd
import os

In [2]:
autism_data = '/home/makism/Temp/autism/'

In [3]:
def parse_dataset():
    _target_column_name = 'asd'
    _prediction_label_names = [0, 1]
    
    subject_id = pd.read_csv(os.path.join(autism_data, 'data', 'train.csv'), header=None)
    # read the list of the subjects
    df_participants = pd.read_csv(os.path.join(autism_data, 'data', 'participants.csv'), index_col=0)
    df_participants.columns = ['participants_' + col for col in df_participants.columns]
    
    # load the structural and functional MRI data
    df_anatomy = pd.read_csv(os.path.join(autism_data, 'data', 'anatomy.csv'), index_col=0)
    df_anatomy.columns = ['anatomy_' + col for col in df_anatomy.columns]
    df_fmri = pd.read_csv(os.path.join(autism_data, 'data', 'fmri_filename.csv'), index_col=0)
    df_fmri.columns = ['fmri_' + col for col in df_fmri.columns]
    
    # load the QC for structural and functional MRI data
    df_anatomy_qc = pd.read_csv(os.path.join(autism_data, 'data', 'anatomy_qc.csv'), index_col=0)
    df_fmri_qc = pd.read_csv(os.path.join(autism_data, 'data', 'fmri_qc.csv'), index_col=0)
    
    # rename the columns for the QC to have distinct names
    df_anatomy_qc = df_anatomy_qc.rename(columns={"select": "anatomy_select"})
    df_fmri_qc = df_fmri_qc.rename(columns={"select": "fmri_select"})

    X = pd.concat([df_participants, df_anatomy, df_anatomy_qc, df_fmri, df_fmri_qc], axis=1)
    X = X.loc[subject_id[0]]
    y = X['participants_asd']
    y = y.rename(columns={'participants_asd': _target_column_name})
    X = X.drop('participants_asd', axis=1)

    return X, y.values

In [4]:
data, labels = parse_dataset()

In [5]:
fmri_data = data[[col for col in data.columns if col.startswith('fmri')]]

In [6]:
fmri_msdl_filenames = fmri_data['fmri_msdl']

In [7]:
fmri = np.array([pd.read_csv(autism_data + "/" + subject_filename, header=None).values 
                 for subject_filename in fmri_msdl_filenames])

In [8]:
anatomy = data[[col for col in data.columns if col.startswith('anatomy')]]
anatomy = anatomy.drop(columns='anatomy_select')

Store files


In [9]:
np.save('data/autism_fmri_ts.npy', fmri)
np.save('data/autism_anatomy.npy', anatomy)
np.save('data/autism_labels.npy', labels)